package chapter04

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}


/**
 * author: yuhui
 * descriptions: WordCount实现的第六种方式：combineByKey
 * date: 2024 - 10 - 29 11:37 上午
 */
object WordCount6 {
  def main(args: Array[String]): Unit = {
    val config: SparkConf = new SparkConf().setMaster("local[*]").setAppName("combineByKey")

    val sc: SparkContext = new SparkContext(config)

    val lines: RDD[String] = sc.textFile("BookData/input/04data.txt")

    lines
      .coalesce(1)
      .flatMap(_.split("[，。、]"))
      .map((_, 1))
      .combineByKey(
        x => x,
        (x: Int, y: Int) => x + y,
        (x: Int, y: Int) => x + y
      )
      .collect()
      .foreach(println)
  }
}